home *** CD-ROM | disk | FTP | other *** search
- /* National Institute of Standards and Technology (NIST)
- /* National Computer System Laboratory (NCSL)
- /* Office Systems Engineering (OSE) Group
- /* ********************************************************************
- /* D I S C L A I M E R
- /* (March 8, 1989)
- /*
- /* There is no warranty for the NIST NCSL OSE SGML parser and/or the NIST
- /* NCSL OSE SGML parser validation suite. If the SGML parser and/or
- /* validation suite is modified by someone else and passed on, NIST wants
- /* the parser's recipients to know that what they have is not what NIST
- /* distributed, so that any problems introduced by others will not
- /* reflect on our reputation.
- /*
- /* Policies
- /*
- /* 1. Anyone may copy and distribute verbatim copies of the SGML source
- /* code as received in any medium.
- /*
- /* 2. Anyone may modify your copy or copies of SGML parser source code or
- /* any portion of it, and copy and distribute such modifications provided
- /* that all modifications are clearly associated with the entity that
- /* performs the modifications.
- /*
- /* NO WARRANTY
- /* ===========
- /*
- /* NIST PROVIDES ABSOLUTELY NO WARRANTY. THE SGML PARSER AND VALIDATION
- /* SUITE ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
- /* EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- /* THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
- /* WITH YOU. SHOULD THE SGML PARSER OR VALIDATION SUITE PROVE DEFECTIVE,
- /* YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
- /*
- /* IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL NIST BE LIABLE FOR
- /* DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL,
- /* INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
- /* INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
- /* BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A
- /* FAILURE OF THE PROGRAM TO OPERATE WITH PROGRAMS NOT DISTRIBUTED BY
- /* NIST) THE PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF
- /* SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
- */
-
- /************************************************************************/
- /* TITLE: SGML PARSER */
- /* SYSTEM: DTD PROCESSOR */
- /* SUBSYSTEM: */
- /* SOURCE FILE: DTUINP.C */
- /* AUTHOR: Jim Heath & Mike Garris */
- /* */
- /* DATE CREATED: */
- /* LAST MODIFIED: */
- /* */
- /* REVISIONS */
- /* WHEN WHO WHY */
- /************************************************************************/
- #include <stdio.h>
- #include <ctype.h>
- #include <setjmp.h>
- #include "qntyset.h"
- #include "dtd.h"
- #include "dtdglbl.h"
- #include "dtdfncs.h"
-
- #define STACKSIZE 1000
- int mystack[STACKSIZE];
- int stkptr = 0;
- static FILE *entfile = NULL;
- /* ============================================================ */
- /* inpsep() uses jgetc() to input a series of multiple RS's, */
- /* RE's, SPACE's, EE's, and rsolves parameter entity references */
- /* processing any sequential occurances of the above in the */
- /* resolving text. The process continues inputing data until */
- /* a character not of the above types is found. Upon which, it */
- /* returns the number of separators encountered. Inpsep() can */
- /* be used for PS's, TS's, and DS's because these above char's */
- /* are common to all three. It is the caller's responsibilty to */
- /* impose restrictions and checks according to which separator */
- /* is to be used. */
- /* ============================================================ */
- inpsep(septype)
- int septype;
- {
- int c;
- int sepcount = 0;
- for(;;){
- if ((c = jgetc()) == EOF)
- return(EOF);
- switch ((char) c) {
- /* all common to PS's, TS's, and DS's */
- case RS:
- case RE:
- case SPACE:
- case EE:
- case TAB:
- sepcount++;
- break;
- /* must check if parameter entity reference */
- case PERO:
- if ((c = jgetc()) == EOF)
- return(EOF);
- /* if it is a parameter entity reference ...*/
- if(ISALPHA(c)){
- /* set flag to show we are in an entity */
- SETFLAG(IN_ENTITY);
- jungetc(c);
- /* call procedure to resolve reference at separator level */
- reslvpref();
- /* bump separator counter because a parameter entity reference */
- /* is considered a separator */
- sepcount++;
- break;
- }
- /* otherwise, not a parameter entity reference */
- else{
- /* so unget char and PERO, and return */
- jungetc(c);
- jungetc(PERO);
- return(sepcount);
- }
- /* if '-', then check if comment */
- case '-':
- /* if separator type is not PS, then comment can not exist */
- if(septype != PS){
- /* unget '-' and return */
- jungetc(c);
- return(sepcount);
- }
- if ((c = jgetc()) == EOF)
- return(EOF);
- /* otherwise if comment ...*/
- if (c == '-') {
- /* call procedure to input comment */
- inpcomment();
- /* bump separator counter because a comment is a PS */
- sepcount++;
- break;
- }
- /* if not a comment, then unget char and '-' returning */
- jungetc(c);
- jungetc('-');
- return(sepcount);
- /* if non of the above characters, then unget char and return */
- default:
- jungetc(c);
- return(sepcount);
- }
- }
- }
-
- /* ============================================================ */
- /* reslvpref() resolves any parameter entity reference at a */
- /* separator level. It inputs the entity name, searches a table */
- /* on the name getting its entity text, and 'ungets' the text */
- /* for further processing. */
- /* ============================================================ */
- void reslvpref()
- {
- char namearray[NAMELEN + 1], *nameptr = namearray;
- char *resptr;
- char tarray[LITLEN + 1], *tptr = tarray;
- int c;
- int synkey;
-
- /* input the parameter entity name */
- if(INPNAME(&nameptr, NAMELEN - 1, noxlat) >= GOOD){
- nameptr = namearray;
- /* search a table for a match on the name returning a */
- /* syntactic literal and the text associated with the name */
- synkey = search(PARM_ENT_NAME, nameptr, &resptr);
- switch(synkey){
- /* if the syntactic literal is 'ILLCHAR' then the */
- /* search was unsuccessful */
- case ILLCHAR:
- syntxerr("Entity Reference not found in table");
- break;
- case KW_PUBLIC:
- case KW_SYSTEM:
- if((c = jgetc()) == EOF)
- terminate(1, "End of File found while resolving parameter entity reference");
- if(c != REFC)
- jungetc(c);
- if (strlen(resptr))
- cknotation(resptr);
- if (strlen(entfilename) == 0) {
- jungetc(EE);
- break;
- }
- entfile = safefopen(entfilename, "r", ENTITYFILE);
- break;
- case KW_MD:
- case KW_STARTTAG:
- case KW_ENDTAG:
- /* if the bracketed text keyword MD was found ... */
- /* add the delimeters to the test string */
- if (synkey == KW_MD)
- sprintf(tptr,"<!%s>",resptr);
- else if (synkey == KW_STARTTAG)
- sprintf(tptr,"<%s>",resptr);
- else if (synkey == KW_ENDTAG)
- sprintf(tptr,"</%s>",resptr);
- if((c = jgetc()) == EOF)
- terminate(1, "End of File found while resolving parameter entity reference");
- if(c != REFC)
- jungetc(c);
- /* call procedure which ungets the text string */
- ungetreslv(tarray);
- break;
- case KW_MS:
- /* add the delimeters to the test string */
- sprintf(tptr,"<![%s]]>",resptr);
- if((c = jgetc()) == EOF)
- terminate(1, "End of File found while resolving parameter entity reference");
- if(c != REFC)
- jungetc(c);
- /* call procedure which ungets the text string */
- ungetreslv(tarray);
- break;
-
- /* ***** must also add cases for STARTTAG and ENDTAG ***** */
-
- /* if syntactic literal is NULL, then entity text consists */
- /* of only a parameter literal */
- case NULL:
- if((c = jgetc()) == EOF)
- terminate(1, "End of File found while resolving parameter entity reference");
- if(c != REFC)
- jungetc(c);
- /* unget parameter literal */
- ungetreslv(resptr);
- break;
- /* any other syntactic literal is illegal */
- default:
- syntxerr("Illegal use of syntactic literal in a PS entity reference");
- break;
- }
- }
- else
- syntxerr("Reference name not found in PS entity reference");
- }
-
- /* ============================================================ */
- /* ungetreslv() puts a text string onto the "unget" stack in */
- /* reverse order. First pushing an EE onto the stack marking */
- /* the end of and enitity text. */
- /* ============================================================ */
- void ungetreslv(resptr)
- char *resptr;
- {
- int len;
- REGISTER char *endptr = resptr;
-
- len = strlen(resptr);
- jungetc(EE);
- if(len > 0){
- endptr += len - 1;
- while(endptr != resptr)
- jungetc(*endptr--);
- jungetc(*endptr);
- }
- }
- /* ============================================================ */
- inpitem(recptr, len, firstchar, remchar, xlat, rniflag)
- char **recptr;
- int len;
- int (*firstchar)(), (*remchar)(), (*xlat)();
- int rniflag;
- {
- char tmparray[NAMELEN + 2], *tbuff = tmparray;
- char *temp = *recptr;
- REGISTER int c, j;
- int ccount = 0, index;
-
- if(len > NAMELEN)
- terminate(1, "length exceeds NAMELEN in getitem()");
- if ((c = (*xlat)(jgetc())) == EOF)
- return(EOF);
- ccount++;
- if (rniflag == YES)
- if (c == RNI) {
- len++;
- xlat = TOUPPER;
- goto L1;
- }
- if (!(*firstchar)(c)) {
- jungetc(c);
- return(BAD);
- }
- L1:
- *tbuff++ = c;
- while (--len) {
- if ((c = (*xlat)(jgetc())) == EOF)
- return(EOF);
- if ((*remchar)(c)) {
- ccount++;
- *tbuff++ = c;
- }
- else {
- jungetc(c);
- break;
- }
- }
- if ((c = (*xlat)(jgetc())) == EOF)
- return(EOF);
- jungetc(c);
- if ((*remchar)(c)) {
- for (; ccount > 0; ccount--)
- jungetc(tmparray[ccount]);
- return(BAD);
- }
- for (j = 0; j < ccount; j++)
- *temp++ = tmparray[j];
- *temp = '\0';
- index = getkwindex(*recptr);
- *recptr = temp;
- return(index);
- }
- /* ============================================================ */
- void inpcomment()
- {
- REGISTER int c;
- for(;;) {
- if ((c = jgetc()) == EOF)
- terminate(1, "EOF found while processing comment");
- if (c != '-')
- continue;
- if ((c = jgetc()) == EOF)
- terminate(1, "EOF found while processing comment");
- if (c == '-')
- return;
- }
- }
- /* ============================================================ */
- int inpMDO()
- {
- REGISTER int c;
-
- if ((c = jgetc()) == EOF)
- return(EOF);
- if (c != '<') {
- jungetc(c);
- return(BAD);
- }
- if ((c = jgetc()) == EOF)
- return(EOF);
- if (c != '!') {
- jungetc(c);
- jungetc('<');
- return(BAD);
- }
- ADDCHAR('<');
- ADDCHAR('!');
- return(GOOD);
- }
-
- /* ============================================================ */
- /* inpparmlit() inputs a parameter literal delemeted by either */
- /* LIT's or LITA's. It only recognizes PERO's ans CRO's as */
- /* mark-up due to the standards definition of replaceable */
- /* parameter data. */
- /* ============================================================ */
- void inpparmlit(litptr)
- char **litptr;
- {
- int delimeter, len = 0;
- REGISTER int c, d;
- char *lptr = *litptr;
- int refflag = 0; /* flag if 1 => parameter entity reference found in */
- /* parameter literal with unmatch EE */
- /* if 0 => no unmatched reference currently in */
- /* parameter literal */
-
- if((c = jgetc()) == EOF)
- terminate(1, "End of File found while processing parameter literal");
- /* if LIT then delimeter is LIT */
- if(c == LIT)
- delimeter = LIT;
- /* else delimeter is LITA */
- else
- if(c == LITA)
- delimeter = LITA;
- else{
- ADDCHAR(c);
- jungetc(c);
- syntxerr("Delimeter not found in parameter literal");
- }
- /* while closeing delimeter not found ...*/
- while((c = jgetc()) != delimeter){
- switch ((char) c){
- case EE:
- /* EE found not matching any existing reference within par. lit. */
- if(refflag == 0)
- syntxerr("EE found terminating reference not occurring within parameter literal");
- refflag = 0;
- break;
- /* if PERO, then check if entity reference */
- case PERO:
- if ((c = jgetc()) == EOF)
- terminate(1, "End of File found while interpreting parameter literal");
- /* if reference ... */
- if(ISALPHA(c)){
- jungetc(c);
- /* call procedure to resolve parameter entity reference within */
- /* replaceable parameter data */
- reslvreplpref(&lptr, &len);
- /* set flag to unmatched reference found in parameter literal */
- refflag = 1;
- }
- else{
- /* otherwise, no reference, so treat PERO as char data */
- *lptr++ = PERO;
- *lptr++ = c;
- *lptr = '\0';
- len += 2;
- }
- break;
- /* if '&', then check if character reference */
- case '&':
- if((c = jgetc()) == '#') {
- if ((d = jgetc()) == EOF)
- terminate(1, "End of File found while interpreting parameter literal");
- if(ISDIGIT(d) || isnmstrt(d)) {
- jungetc(d);
- /* if CRO, then call procedure to resolve char reference */
- reslvcharref(&lptr, &len);
- }
- else{
- *lptr++ = '&';
- *lptr++ = c;
- *lptr++ = d;
- *lptr = '\0';
- len += 3;
- }
- }
- /* otherwise, treat '&' as char data */
- else{
- jungetc(c);
- *lptr++ = '&';
- *lptr = '\0';
- len += 1;
- }
- break;
- /* default => char data */
- default:
- *lptr++ = c;
- *lptr = '\0';
- len++;
- break;
- }
- /* if length of interpretted parameter literal */
- /* exceeds LITLEN then ERROR */
- if(len > LITLEN){
- ADDCHAR(LIT);
- ADDSTRING(lptr);
- syntxerr("LITLEN is exceeded in parameter literal");
- }
- }
- ADDCHAR(LIT);
- ADDSTRING(*litptr);
- ADDCHAR(LIT);
- *litptr = lptr;
- }
-
- /* ============================================================ */
- /* reslvcharref() resolves character references. It determines */
- /* if character reference consists of a function name or char */
- /* number. If reference consists of a character number, then */
- /* an a to i translation routine is called and appropriate */
- /* character is added to the literal string. */
- /* ============================================================ */
- void reslvcharref(litptr, lenptr)
- char **litptr;
- int *lenptr;
- {
- REGISTER int j;
- char namearray[NAMELEN + 1], *nameptr = namearray;
- char *lptr = *litptr;
-
- /* input name */
- if((j = INPNAME(&nameptr, NAMELEN, TOUPPER)) >= GOOD){
- switch (j){
- /* if name is valid then assume char reference containing function name*/
- case RE:
- *lptr++ = RE;
- *lptr = '\0';
- *lenptr = *lenptr + 1;
- break;
- case RS:
- *lptr++ = RS;
- *lptr = '\0';
- *lenptr = *lenptr + 1;
- break;
- case SPACE:
- *lptr++ = SPACE;
- *lptr = '\0';
- *lenptr = *lenptr + 1;
- break;
- /* if any other name found then ERROR */
- default:
- ADDSTRING(synliteral(j));
- syntxerr("Unknown function name in character reference");
- break;
- }
- }
- /* otherwise, no name found, so assume character number */
- else{
- if((j = jgetc()) == EOF)
- terminate(1, "End of File found while resolving character reference");
- /* if next character inputted is not numeric then ERROR */
- if(ISALPHA(j)){
- ADDCHAR(j);
- jungetc(j);
- syntxerr("Character number expected while resolving character reference");
- }
- jungetc(j);
- /* otherwise call procedure to add character represented to lit string */
- xlatcharnum(&lptr, lenptr);
- }
- if((j = jgetc()) == EOF)
- terminate(1, "End of File found while resolving character reference");
- if(j != REFC)
- jungetc(j);
- *litptr = lptr;
- }
-
- /* ============================================================ */
- /* xlatcharnum() inputs an ascii number string and converts it */
- /* to its character equivalent adding it to the literal string. */
- /* ============================================================ */
- void xlatcharnum(litptr, lenptr)
- char **litptr;
- int *lenptr;
- {
- REGISTER int j;
- int xlatcharnum;
- char *lptr = *litptr, charnum[MAX_CHAR_IN_DELIM_NUM + 1];
- REGISTER char *cptr = charnum;
-
- if((j = jgetc()) == EOF)
- terminate(1, "End of File found while resolving character reference");
- if(!ISDIGIT(j)){
- ADDCHAR(j);
- jungetc(j);
- syntxerr("Character number not found in resolving character reference");
- }
- while(ISDIGIT(j)){
- *cptr++ = j;
- *cptr = '\0';
- if((j = jgetc()) == EOF)
- terminate(1, "End of File found while resolving character reference");
- }
- jungetc(j);
- xlatcharnum = atoi(charnum);
- *lptr++ = xlatcharnum;
- *lptr = '\0';
- *lenptr = *lenptr + 1;
- *litptr = lptr;
- }
-
- /* ============================================================ */
- /* reslvreplpref() resolves parameter entity references within */
- /* replaceable parameter data. It inputs the reference name, */
- /* searches a table on the name, and adds then entity text to */
- /* the interpretted literal string. */
- /* ============================================================ */
- void reslvreplpref(litptr, lenptr)
- char **litptr;
- int *lenptr;
- {
- REGISTER char *lptr = *litptr;
- char namearray[NAMELEN + 1], *nameptr = namearray;
- int c, len;
- char *resptr;
- int synkey;
-
- /* input name */
- if(INPNAME(&nameptr, NAMELEN - 1, noxlat) >= GOOD){
- nameptr = namearray;
- synkey = search(PARM_ENT_NAME, nameptr, &resptr);
- switch(synkey){
- /* if syntactic literal is MD then add appropriate delimeters */
- /* to resolved text */
- case KW_MD:
- *lptr++ = '<';
- *lptr++ = '!';
- strcat(lptr, resptr);
- len = strlen(resptr);
- lptr += len;
- *lptr++ = MDC;
- *lptr = '\0';
- len += 3;
- break;
-
- /* ***** add bracketed text keywords STARTTAG, ENDTAG, and MD ***** */
-
- /* if syntactic literal is 'ILLCHAR' then search unsuccessful */
- case ILLCHAR:
- syntxerr("Entity reference not found in table");
- break;
- /* if syntactic literal is NULL, then entity text consists of */
- /* parameter literal only */
- case NULL:
- strcat(lptr, resptr);
- len = strlen(resptr);
- lptr += len;
-
- /* *lptr++ = EE;*/
- *lptr = '\0';
-
- break;
- /* any other syntactic literal found is ERROR */
- default:
- syntxerr("Unknown syntactic literal in parameter entity reference.");
- break;
- }
- }
- else
- syntxerr("Reference name not found while resolving parameter entity reference.");
- /* increment length of interpretted literal by length of entity text */
- *lenptr = *lenptr + len;
- if((c = jgetc()) == EOF)
- terminate(1, "End of File found while resolving parameter entity referecne");
- if(c != REFC)
- jungetc(c);
- *litptr = lptr;
- }
- /* ============================================================ */
- int jgetc()
- {
- extern int debug;
- REGISTER int temp;
- if(stkptr > 0)
- temp = mystack[--stkptr];
- else if (entfile != NULL)
- temp = getc(entfile);
- else
- temp = getc(docfile);
- if ((temp == CTRLZ) || (temp == EOF)) {
- if (entfile != NULL) {
- safefclose(entfile, entfilename, ENTITYFILE);
- entfile = NULL;
- temp = EE;
- }
- else
- temp = EOF;
- }
- if (debug & 1) {
- if ((temp > ' ') && (temp < 0x7f))
- printf("jgetc returns %c\n", temp);
- else
- printf("jgetc returns 0x%02x\n", temp);
- }
- if ((char) temp == EE){
- CLEARFLAG(IN_ENTITY);
- if (TESTFLAG(IN_DECL) && TESTFLAG(DECL_IS_IN_ENTITY))
- syntxerr("illegal parameter entity reference");
- }
- return(temp);
- }
- /* ============================================================ */
- void jungetc(c)
- REGISTER int c;
- {
- extern int debug;
- if (debug & 1) {
- if ((c > ' ') && (c < 0x7f))
- printf("jungetc returns %c\n", c);
- else
- printf("jungetc returns 0x%02x\n", c);
- }
- if(stkptr >= STACKSIZE)
- terminate(1,"User Stack Over-Flow!");
- mystack[stkptr++] = c;
- }
- /* ============================================================ */
- void stackinit()
- {
- stkptr = 0;
- }
-
- /* ============================================================ */
- /* getkwindex() returns the defined string equivalent of an */
- /* inputted integer. */
- /* ============================================================ */
- int getkwindex(s)
- char *s;
- {
- REGISTER int jj;
- char *j, *k = s;
- char chararray[NAMELEN + 1];
- static char *keywords[] = {
- "DOCTYPE", "ELEMENT", "ENTITY", "RNIDEFAULT",
- "ANY", "CDATA", "RCDATA", "SDATA", "PI", "EMPTY",
- "STARTTAG", "ENDTAG", "MS", "MD", "ATTLIST",
- "ID", "IDREF", "IDREFS", "NAME", "NAMES",
- "NMTOKEN", "NMTOKENS", "NOTATION", "NUMBER",
- "NUMBERS", "NUTOKEN", "NUTOKENS", "#REQUIRED",
- "#CURRENT", "#CONREF", "#IMPLIED", "#FIXED","",
- "","","SYSTEM", "PUBLIC", "NDATA" };
- #define MAXKEYWORDS ((sizeof(keywords))/(sizeof(char *)))
- static int keytokens[] =
- {
- KW_DOCTYPE, KW_ELEMENT, KW_ENTITY, KW_RNIDEFAULT,
- KW_ANY, KW_CDATA, KW_RCDATA, KW_SDATA, KW_PI, KW_EMPTY,
- KW_STARTTAG, KW_ENDTAG, KW_MS, KW_MD, KW_ATTLIST,
- KW_ID, KW_IDREF, KW_IDREFS, KW_NAME, KW_NAMES,
- KW_NMTOKEN, KW_NMTOKENS, KW_NOTATION, KW_NUMBER,
- KW_NUMBERS, KW_NUTOKEN, KW_NUTOKENS, KW_REQUIRED,
- KW_CURRENT, KW_CONREF, KW_IMPLIED, KW_FIXED, KW_GROUP,
- KW_LIT, KW_UNFIXED, KW_SYSTEM, KW_PUBLIC, KW_NDATA };
-
- for (jj = 0; jj < MAXKEYWORDS; jj++) {
- if(strcmp(s, keywords[jj]) == 0)
- return(keytokens[jj]);
- }
- if (strcmp(s, "RE") == 0)
- return(RE);
- if (strcmp(s, "RS") == 0)
- return(RS);
- if (strcmp(s, "SPACE") == 0)
- return(SPACE);
-
- for(j = chararray; (*j = TOUPPER(*k)) != EOS; j++, k++);
- if(strcmp(chararray, "#DEFAULT") == 0)
- return(KW_RNIDEFAULT);
- return(GOOD);
- }
-